home *** CD-ROM | disk | FTP | other *** search
/ Developer CD Series 2000 November: Tool Chest / Dev.CD Nov 00 TC Disk 1.toast / Sample Code / Contributed / SpriteWorld / SpriteWorld Files / BlitPixie / Sources / BlitPixieBlitClear.c < prev    next >
Encoding:
Text File  |  2000-10-06  |  6.7 KB  |  369 lines  |  [TEXT/CWIE]

  1. ///--------------------------------------------------------------------------------------
  2. //    BlitPixieBlitClear - cache-optimized screen blitter
  3. //
  4. //    written by Anders F Björklund <afb@algonet.se>
  5. //    ©2000 afb.
  6. ///--------------------------------------------------------------------------------------
  7.  
  8. #ifndef __BLITPIXIE__
  9. #include "BlitPixieHeader.h"
  10. #endif
  11.  
  12. #include "BlitPixieAsm.h"
  13.  
  14. #pragma mark *** PowerPC asm : 
  15. #if USE_PPC_ASSEMBLY
  16.  
  17. //    NOTE:    assumes dstRowBytes is multiple of 4 (for alignment purposes)
  18. //    NOTE:    assumes bytes, rows > 0
  19.  
  20. ASM_FUNC void BlitPixieBlitClear(
  21.     register unsigned char *src,            // r3
  22.     register unsigned char *dst,            // r4
  23.     register unsigned long color,            // r5
  24.     register unsigned long srcRowBytes,        // r6
  25.     register unsigned long dstRowBytes,        // r7
  26.     register unsigned short bytes,            // r8
  27.     register unsigned short rows )            // r9
  28. {
  29. #define    r_src                r3
  30. #define    r_dst                r4
  31. #define    r_color                r5
  32. #define    r_srcRowBytes        r6
  33. #define    r_dstRowBytes        r7
  34. #define    r_width                r8
  35. #define    r_height            r9
  36.  
  37. #define    r_srcStride            r31
  38. #define    r_dstStride            r30
  39. #define    r_bytes                r29
  40. #define    r_blocks            r28
  41. #define    r_y                    r27
  42.  
  43. #define    kRegisterSaveStack    (5 * 4)
  44.  
  45.     ASM_BEGIN
  46.     stmw     r27,-kRegisterSaveStack(SP)
  47.  
  48.     stw        r_color,-(kRegisterSaveStack+8)(SP)
  49.     stw        r_color,-(kRegisterSaveStack+4)(SP)
  50.  
  51.     mr        r_bytes,r_width
  52.     mr        r_y,r_height
  53.  
  54.     sub        r_srcStride,r_srcRowBytes,r_width
  55.     sub        r_dstStride,r_dstRowBytes,r_width
  56.  
  57.     neg        r0,r4
  58.     rlwinm    r0,r0,0,27,31
  59.     sub        r_bytes,r_bytes,r0
  60.  
  61.     subi    r_src,r_src,8
  62.     subi    r_dst,r_dst,8
  63.  
  64.     lfd        fp0,-(kRegisterSaveStack+8)(SP)
  65.  
  66.     rlwinm.  r_blocks,r_bytes,27,5,31
  67.     rlwinm   r_bytes,r_bytes,0,27,31
  68.  
  69.     #define    FLAG_BLOCKS            20
  70. //    #define    FLAG_FREE            21    // note: free for use
  71.  
  72.     #define    FLAG_PRE1            26    
  73.     #define    FLAG_PRE2            25    
  74.     #define    FLAG_PRE4            24    
  75.     #define    FLAG_PRE8            23    
  76.     #define    FLAG_PRE16            22        
  77.  
  78.     #define    FLAG_POST1            31
  79.     #define    FLAG_POST2            30
  80.     #define    FLAG_POST4            29
  81.     #define    FLAG_POST8            28    
  82.     #define    FLAG_POST16            27
  83.  
  84.         rlwinm    r0,r0,27-FLAG_PRE16,FLAG_PRE16,FLAG_PRE1
  85.         rlwimi    r0,r_bytes,27-FLAG_POST16,FLAG_POST16,FLAG_POST1
  86.         mtcrf    0x07,r0            // cr5 | cr6 | cr7
  87.         crnor    FLAG_BLOCKS,0*CR_NO + CR_EQ,0*CR_NO + CR_EQ
  88.         
  89.     @rowloop:
  90.         mtctr    r_blocks
  91.             
  92.             // copy pre-block
  93.         bc        IF_NOT,FLAG_PRE1,@skip_pre1
  94.  
  95.         lbz        r0,8(r3)
  96.         addi    r3,r3,1
  97.         stb        r0,8(r4)
  98.         addi    r4,r4,1
  99.         stb        r_color,7(r3)
  100.         
  101.     @skip_pre1:
  102.         bc        IF_NOT,FLAG_PRE2,@skip_pre2
  103.  
  104.         lhz        r0,8(r3)
  105.         addi    r3,r3,2
  106.         sth        r0,8(r4)
  107.         addi    r4,r4,2
  108.         sth        r_color,6(r3)
  109.     
  110.     @skip_pre2:
  111.         bc        IF_NOT,FLAG_PRE4,@skip_pre4
  112.  
  113.         lwz        r0,8(r3)
  114.         addi    r3,r3,4
  115.         stw        r0,8(r4)
  116.         addi    r4,r4,4
  117.         stw        r_color,4(r3)
  118.     
  119.     @skip_pre4:
  120.         bc        IF_NOT,FLAG_PRE8,@skip_pre8
  121.  
  122.         lfd        fp1,8(r3)
  123.         addi    r3,r3,8
  124.         stfd    fp1,8(r4)
  125.         addi    r4,r4,8
  126.         stfd    fp0,0(r3)
  127.     
  128.     @skip_pre8:
  129.         bc        IF_NOT,FLAG_PRE16,@skip_pre16
  130.         
  131.         lfd        fp1,8(r3)
  132.         lfd        fp2,16(r3)
  133.         addi    r3,r3,16
  134.         stfd    fp1,8(r4)
  135.         stfd    fp2,16(r4)
  136.         addi    r4,r4,16
  137.         stfd    fp0,-8(r3)
  138.         stfd    fp0,0(r3)
  139.     
  140.     @skip_pre16:
  141.  
  142.             // copy blocks
  143.         bc        IF_NOT,FLAG_BLOCKS,@skipblockloop
  144.         li        r0,8
  145.         
  146.     @blockloop:
  147.         lfd        fp1,8(r3)
  148.         lfd        fp2,16(r3)
  149.         lfd        fp3,24(r3)
  150.         lfd        fp4,32(r3)
  151.  
  152.         stfd    fp1,8(r4)
  153.         stfd    fp2,16(r4)
  154.         stfd    fp3,24(r4)
  155.         stfdu    fp4,32(r4)
  156.     
  157.         stfd    fp0,8(r3)
  158.         stfd    fp0,16(r3)
  159.         stfd    fp0,24(r3)
  160.         stfdu    fp0,32(r3)
  161.         
  162.         bdnz    @blockloop
  163.     @skipblockloop:
  164.  
  165.         subic.   r_y,r_y,1
  166.         
  167.             // copy post-block
  168.         bc        IF_NOT,FLAG_POST16,@skip_post16
  169.         
  170.         lfd        fp1,8(r3)
  171.         lfd        fp2,16(r3)
  172.         addi    r3,r3,16
  173.         stfd    fp1,8(r4)
  174.         stfd    fp2,16(r4)
  175.         addi    r4,r4,16
  176.         stfd    fp0,-8(r3)
  177.         stfd    fp0,0(r3)
  178.     
  179.     @skip_post16:
  180.         bc        IF_NOT,FLAG_POST8,@skip_post8
  181.  
  182.         lfd        fp0,8(r3)
  183.         addi    r3,r3,8
  184.         stfd    fp0,8(r4)
  185.         addi    r4,r4,8
  186.         stfd    fp0,0(r3)
  187.     
  188.     @skip_post8:
  189.         bc        IF_NOT,FLAG_POST4,@skip_post4
  190.  
  191.         lwz        r0,8(r3)
  192.         addi    r3,r3,4
  193.         stw        r0,8(r4)
  194.         addi    r4,r4,4
  195.         stw        r_color,4(r3)
  196.     
  197.     @skip_post4:
  198.         bc        IF_NOT,FLAG_POST2,@skip_post2
  199.  
  200.         lhz        r0,8(r3)
  201.         addi    r3,r3,2
  202.         sth        r0,8(r4)
  203.         addi    r4,r4,2
  204.         sth        r_color,6(r3)
  205.     
  206.     @skip_post2:
  207.         bc        IF_NOT,FLAG_POST1,@skip_post1
  208.  
  209.         lbz        r0,8(r3)
  210.         addi    r3,r3,1
  211.         stb        r0,8(r4)
  212.         addi    r4,r4,1
  213.         stb        r_color,7(r3)
  214.     
  215.     @skip_post1:
  216.  
  217.         add        r3,r3,r_srcStride
  218.         add        r4,r4,r_dstStride
  219.     
  220.     bne            @rowloop
  221.  
  222.     lmw     r27,-kRegisterSaveStack(SP)
  223.     ASM_END
  224. }
  225.  
  226. #pragma mark *** 680x0 asm : 
  227. #elif USE_68K_ASSEMBLY
  228.  
  229. ASM_FUNC void BlitPixieBlitClear(
  230.     unsigned char *src,
  231.     unsigned char *dst,
  232.     unsigned long color,
  233.     unsigned long srcRowBytes,
  234.     unsigned long dstRowBytes,
  235.     unsigned short bytes,
  236.     unsigned short rows)
  237. {
  238.     #define D_color            D2
  239.     #define D_bytes            D3
  240.     #define D_rows            D4
  241.     #define D_srcBytes        D5
  242.     #define D_dstBytes        D6
  243.     
  244.     ASM_BEGIN
  245.  
  246.     MOVEM.L      D3-D6/A2,-(SP)
  247.  
  248.     MOVE.L      src,A0
  249.     MOVE.L      dst,A1
  250.     MOVE.L      color,D_color
  251.     MOVE.L      srcRowBytes,D_srcBytes
  252.     MOVE.L      dstRowBytes,D_dstBytes
  253.     MOVE.W      bytes,D_bytes
  254.     MOVE.W      rows,D_rows
  255.     
  256.     EXT.L      D_bytes
  257.     SUB.L      D_bytes,D_srcBytes
  258.     SUB.L      D_bytes,D_dstBytes
  259.      
  260.  //    *** LOOP SETUP ***
  261.     MOVEQ     #15,D0
  262.     CLR.L      D1
  263.  
  264.     MOVE.W    D_bytes,D1
  265.     LSR.W     #2,D1            //    / sizeof(long)
  266.     AND.W      D0,D1
  267.     LSR.W     #2,D1            //    * sizeof(MOVE.L    (A0),(A1)+; MOVE.L    D_color,(A0)+)
  268.     LEA          @loopend,A2
  269.     SUBA.L      D1,A2    
  270.  
  271.     MOVE.W    D_bytes,D1
  272.     LSR.W       #6,D1
  273.  
  274. // *** COPY ***
  275.  
  276. @rowloop:
  277.  
  278.              // align to word boundary
  279.             // main word copy loop
  280.         MOVE.W    D1,D0
  281.            JMP        (A2)
  282.    @loopstart:
  283.         MOVE.L    (A0),(A1)+
  284.         MOVE.L    D_color,(A0)+
  285.         MOVE.L    (A0),(A1)+
  286.         MOVE.L    D_color,(A0)+
  287.         MOVE.L    (A0),(A1)+
  288.         MOVE.L    D_color,(A0)+
  289.         MOVE.L    (A0),(A1)+
  290.         MOVE.L    D_color,(A0)+
  291.         MOVE.L    (A0),(A1)+
  292.         MOVE.L    D_color,(A0)+
  293.         MOVE.L    (A0),(A1)+
  294.         MOVE.L    D_color,(A0)+
  295.         MOVE.L    (A0),(A1)+
  296.         MOVE.L    D_color,(A0)+
  297.         MOVE.L    (A0),(A1)+
  298.         MOVE.L    D_color,(A0)+
  299.         MOVE.L    (A0),(A1)+
  300.         MOVE.L    D_color,(A0)+
  301.         MOVE.L    (A0),(A1)+
  302.         MOVE.L    D_color,(A0)+
  303.         MOVE.L    (A0),(A1)+
  304.         MOVE.L    D_color,(A0)+
  305.         MOVE.L    (A0),(A1)+
  306.         MOVE.L    D_color,(A0)+
  307.         MOVE.L    (A0),(A1)+
  308.         MOVE.L    D_color,(A0)+
  309.         MOVE.L    (A0),(A1)+
  310.         MOVE.L    D_color,(A0)+
  311.         MOVE.L    (A0),(A1)+
  312.         MOVE.L    D_color,(A0)+
  313.         MOVE.L    (A0),(A1)+
  314.         MOVE.L    D_color,(A0)+
  315.     @loopend:
  316.            DBRA    D0,@loopstart
  317.  
  318.               // do left-overs
  319.          MOVE.W    D_bytes,D0
  320.           ANDI.W    #2,D0
  321.           BEQ.S    @restword
  322.           MOVE.W    (A0),(A1)+
  323.           MOVE.W    D_color,(A0)+
  324.       @restword:
  325.         MOVE.W    D_bytes,D0
  326.           ANDI.W    #1,D0
  327.           BEQ.S    @restbyte
  328.           MOVE.B    (A0),(A1)+
  329.           MOVE.B    D_color,(A0)+
  330.       @restbyte:
  331.      
  332.     ADDA.L     D_srcBytes,A0
  333.     ADDA.L     D_dstBytes,A1
  334.  
  335.     SUBQ.W     #1,D_rows
  336.     BNE.S      @rowloop
  337.  
  338.     MOVEM.L      (SP)+,D3-D6/A2
  339.  
  340.     ASM_END
  341. }
  342.  
  343. #pragma mark *** Generic C : 
  344. #elif USE_GENERIC_C
  345.  
  346. void BlitPixieBlitClear(
  347.     unsigned char *src,
  348.     unsigned char *dst,
  349.     unsigned long color,
  350.     unsigned long srcRowBytes,
  351.     unsigned long dstRowBytes,
  352.     unsigned short bytes,
  353.     unsigned short rows)
  354. {
  355.     BLITPIXIE_ASSERT(rows > 0 );
  356.     BLITPIXIE_ASSERT(bytes > 0 );
  357.     
  358.     while (rows--)
  359.     {
  360.         BlitPixieMemCopy( dst, src, bytes );
  361.         BlitPixieMemSet( src, color, bytes );
  362.         src += srcRowBytes;
  363.         dst += dstRowBytes;
  364.     }
  365. }
  366.  
  367. #endif // GENERATING…
  368.  
  369.